import docx
from docx import Document
from openpyxl import Workbook
from tools import *
workbook = Workbook()
booksheet = workbook.active
dir = r'D:\python_test'
file = '123.docx'
f = docx.Document(dir+file)
level = ''
#遍历文档里的段落
for para in f.paragraphs:
    bookname = ''
    auther = ''
    publiser = ''
    resource = '123'
    text = para.text
    if len(text) == 0:
        continue

    text = filter(text)        #用于过滤数据
    textlist=text.split('    ')
    if len(textlist) == 1:
        level = textlist[0]
        print('level1',level)
        continue
    print('level2',level)
    while ' ' in textlist:
        textlist.remove('')
    list = []
    if bookname(textlist[0].strip()):
        bookname = filter(textlist[0].strip(),'[1-9]\d*.')
        print(bookname)
    else:
        continue
    list.append(bookname.strip())
    list.append(textlist[1].strip())
    list.append(publiser.strip())
    list.append(resource.strip())
    list.append(level.strip())
    booksheet.append(list)
workbook.save(file.split('.')[0]+'.xlsx')
f = docx.Document(dir+file)
for para in f.paragraphs:
    text = para.text
    print(text)
from openpyxl import Workbook
workbook = Workbook()
booksheet = workbook.active
list = ['《大卫上学去》','[美]大卫·香农','','南京亲近母语2017年书目','一年级课程书目（图画书书目']
booksheet.append(list)
workbook.save(file.split('.')[0]+'.xlsx')
